output: html_document: toc: false depth: 3 theme: paper highlight: tango —

options(width = 400)

Final-Project-Data-Science

4. Cox Proportional Hazards Models and interactions by study group

library("survival")
library("survey")
## Loading required package: grid
## Loading required package: Matrix
## 
## Attaching package: 'survey'
## The following object is masked from 'package:graphics':
## 
##     dotchart
library("haven")
# loading the dataset
ckd_studies <- read_dta("final_project.dta")
ckd_studies$study<-as.factor(ckd_studies$group)   
# Cox PH Model for ESRD:
   
# Strategy I: backwards selection:
# Full model with all interactions:
mfit_esrd <-coxph (Surv(tesrd_, esrd_) ~ study*sex + study*age + study*bmi + study*sbp + study*dm + study*cvd + study*egfr + study*log_uac, ckd_studies)
summary(mfit_esrd)
## Call:
## coxph(formula = Surv(tesrd_, esrd_) ~ study * sex + study * age + 
##     study * bmi + study * sbp + study * dm + study * cvd + study * 
##     egfr + study * log_uac, data = ckd_studies)
## 
##   n= 12986, number of events= 1519 
## 
##                      coef  exp(coef)   se(coef)       z Pr(>|z|)    
## study2         -4.8112913  0.0081373  1.1102355  -4.334 1.47e-05 ***
## study3         -1.2616258  0.2831932  0.7433887  -1.697 0.089672 .  
## study4          2.7247650 15.2528297  0.7404939   3.680 0.000234 ***
## sex            -0.1405501  0.8688801  0.1232680  -1.140 0.254203    
## age            -0.0289897  0.9714265  0.0045384  -6.388 1.68e-10 ***
## bmi            -0.0277058  0.9726745  0.0092757  -2.987 0.002818 ** 
## sbp             0.0097173  1.0097647  0.0029637   3.279 0.001042 ** 
## dm              0.2118823  1.2360024  0.1256403   1.686 0.091715 .  
## cvd             0.2440388  1.2763939  0.1210930   2.015 0.043873 *  
## egfr           -0.1088785  0.8968394  0.0090624 -12.014  < 2e-16 ***
## log_uac         0.4292837  1.5361567  0.0411901  10.422  < 2e-16 ***
## study2:sex     -0.6466545  0.5237952  0.2081214  -3.107 0.001889 ** 
## study3:sex     -0.2559969  0.7741444  0.1520468  -1.684 0.092245 .  
## study4:sex     -0.4222023  0.6556014  0.1612808  -2.618 0.008850 ** 
## study2:age      0.0321317  1.0326535  0.0081688   3.933 8.37e-05 ***
## study3:age      0.0008301  1.0008304  0.0061410   0.135 0.892476    
## study4:age     -0.0093771  0.9906667  0.0057933  -1.619 0.105532    
## study2:bmi      0.0261049  1.0264486  0.0220721   1.183 0.236925    
## study3:bmi      0.0192229  1.0194088  0.0109127   1.762 0.078150 .  
## study4:bmi     -0.0074489  0.9925787  0.0133716  -0.557 0.577479    
## study2:sbp     -0.0040549  0.9959533  0.0050174  -0.808 0.418994    
## study3:sbp      0.0018458  1.0018475  0.0035500   0.520 0.603094    
## study4:sbp     -0.0028459  0.9971582  0.0036722  -0.775 0.438360    
## study2:dm      -0.2532843  0.7762471  0.1974285  -1.283 0.199521    
## study3:dm       0.0346494  1.0352567  0.1595444   0.217 0.828070    
## study4:dm       0.3927115  1.4809910  0.1611744   2.437 0.014828 *  
## study2:cvd     -0.2018263  0.8172369  0.1963579  -1.028 0.304021    
## study3:cvd     -0.0291416  0.9712789  0.1519759  -0.192 0.847937    
## study4:cvd     -0.5268301  0.5904738  0.1771743  -2.974 0.002944 ** 
## study2:egfr    -0.0076807  0.9923487  0.0136298  -0.564 0.573079    
## study3:egfr     0.0274262  1.0278058  0.0101993   2.689 0.007166 ** 
## study4:egfr     0.0142355  1.0143373  0.0105120   1.354 0.175669    
## study2:log_uac  0.3889912  1.4754915  0.0892648   4.358 1.31e-05 ***
## study3:log_uac  0.0657820  1.0679939  0.0502721   1.309 0.190697    
## study4:log_uac -0.2761908  0.7586682  0.0433858  -6.366 1.94e-10 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##                exp(coef) exp(-coef) lower .95 upper .95
## study2          0.008137  122.89021 0.0009235    0.0717
## study3          0.283193    3.53116 0.0659643    1.2158
## study4         15.252830    0.06556 3.5730644   65.1118
## sex             0.868880    1.15091 0.6823920    1.1063
## age             0.971426    1.02941 0.9628239    0.9801
## bmi             0.972674    1.02809 0.9551510    0.9905
## sbp             1.009765    0.99033 1.0039163    1.0156
## dm              1.236002    0.80906 0.9662157    1.5811
## cvd             1.276394    0.78346 1.0067234    1.6183
## egfr            0.896839    1.11503 0.8810505    0.9129
## log_uac         1.536157    0.65098 1.4170149    1.6653
## study2:sex      0.523795    1.90914 0.3483439    0.7876
## study3:sex      0.774144    1.29175 0.5746448    1.0429
## study4:sex      0.655601    1.52532 0.4779224    0.8993
## study2:age      1.032654    0.96838 1.0162518    1.0493
## study3:age      1.000830    0.99917 0.9888565    1.0129
## study4:age      0.990667    1.00942 0.9794815    1.0020
## study2:bmi      1.026449    0.97423 0.9829906    1.0718
## study3:bmi      1.019409    0.98096 0.9978368    1.0414
## study4:bmi      0.992579    1.00748 0.9669033    1.0189
## study2:sbp      0.995953    1.00406 0.9862073    1.0058
## study3:sbp      1.001848    0.99816 0.9949011    1.0088
## study4:sbp      0.997158    1.00285 0.9900070    1.0044
## study2:dm       0.776247    1.28825 0.5271675    1.1430
## study3:dm       1.035257    0.96594 0.7572575    1.4153
## study4:dm       1.480991    0.67522 1.0798428    2.0312
## study2:cvd      0.817237    1.22364 0.5561703    1.2008
## study3:cvd      0.971279    1.02957 0.7210774    1.3083
## study4:cvd      0.590474    1.69356 0.4172435    0.8356
## study2:egfr     0.992349    1.00771 0.9661902    1.0192
## study3:egfr     1.027806    0.97295 1.0074637    1.0486
## study4:egfr     1.014337    0.98587 0.9936526    1.0355
## study2:log_uac  1.475492    0.67774 1.2386665    1.7576
## study3:log_uac  1.067994    0.93633 0.9677809    1.1786
## study4:log_uac  0.758668    1.31810 0.6968219    0.8260
## 
## Concordance= 0.873  (se = 0.004 )
## Rsquare= 0.225   (max possible= 0.884 )
## Likelihood ratio test= 3316  on 35 df,   p=<2e-16
## Wald test            = 2582  on 35 df,   p=<2e-16
## Score (logrank) test = 3660  on 35 df,   p=<2e-16
# test overall effect of the multilevel interactions:
regTermTest(mfit_esrd, "study:sex")
## Wald test for study:sex
##  in coxph(formula = Surv(tesrd_, esrd_) ~ study * sex + study * age + 
##     study * bmi + study * sbp + study * dm + study * cvd + study * 
##     egfr + study * log_uac, data = ckd_studies)
## F =  38.09898  on  3  and  12951  df: p= < 2.22e-16
regTermTest(mfit_esrd, "study:age")
## Wald test for study:age
##  in coxph(formula = Surv(tesrd_, esrd_) ~ study * sex + study * age + 
##     study * bmi + study * sbp + study * dm + study * cvd + study * 
##     egfr + study * log_uac, data = ckd_studies)
## F =  8.492016  on  3  and  12951  df: p= 1.242e-05
regTermTest(mfit_esrd, "study:bmi")
## Wald test for study:bmi
##  in coxph(formula = Surv(tesrd_, esrd_) ~ study * sex + study * age + 
##     study * bmi + study * sbp + study * dm + study * cvd + study * 
##     egfr + study * log_uac, data = ckd_studies)
## F =  2.167538  on  3  and  12951  df: p= 0.089614
regTermTest(mfit_esrd, "study:sbp")
## Wald test for study:sbp
##  in coxph(formula = Surv(tesrd_, esrd_) ~ study * sex + study * age + 
##     study * bmi + study * sbp + study * dm + study * cvd + study * 
##     egfr + study * log_uac, data = ckd_studies)
## F =  0.6897026  on  3  and  12951  df: p= 0.55821
regTermTest(mfit_esrd, "study:dm")
## Wald test for study:dm
##  in coxph(formula = Surv(tesrd_, esrd_) ~ study * sex + study * age + 
##     study * bmi + study * sbp + study * dm + study * cvd + study * 
##     egfr + study * log_uac, data = ckd_studies)
## F =  1.100864  on  3  and  12951  df: p= 0.34732
regTermTest(mfit_esrd, "study:cvd")
## Wald test for study:cvd
##  in coxph(formula = Surv(tesrd_, esrd_) ~ study * sex + study * age + 
##     study * bmi + study * sbp + study * dm + study * cvd + study * 
##     egfr + study * log_uac, data = ckd_studies)
## F =  2.329112  on  3  and  12951  df: p= 0.072354
regTermTest(mfit_esrd, "study:egfr")
## Wald test for study:egfr
##  in coxph(formula = Surv(tesrd_, esrd_) ~ study * sex + study * age + 
##     study * bmi + study * sbp + study * dm + study * cvd + study * 
##     egfr + study * log_uac, data = ckd_studies)
## F =  7.642564  on  3  and  12951  df: p= 4.22e-05
regTermTest(mfit_esrd, "study:log_uac")
## Wald test for study:log_uac
##  in coxph(formula = Surv(tesrd_, esrd_) ~ study * sex + study * age + 
##     study * bmi + study * sbp + study * dm + study * cvd + study * 
##     egfr + study * log_uac, data = ckd_studies)
## F =  6.928795  on  3  and  12951  df: p= 0.00011748
# Backwards selection for significant interactions (threshold to remove: p<0.1) 
# Removing sbp
mfit_esrd <-coxph (Surv(tesrd_, esrd_) ~ study*sex + study*age + study*bmi + sbp + study*dm + study*cvd + study*egfr + study*log_uac, ckd_studies)
summary(mfit_esrd)
## Call:
## coxph(formula = Surv(tesrd_, esrd_) ~ study * sex + study * age + 
##     study * bmi + sbp + study * dm + study * cvd + study * egfr + 
##     study * log_uac, data = ckd_studies)
## 
##   n= 12986, number of events= 1519 
## 
##                     coef exp(coef)  se(coef)       z Pr(>|z|)    
## study2         -5.125651  0.005942  1.036735  -4.944 7.65e-07 ***
## study3         -1.143368  0.318744  0.685314  -1.668 0.095240 .  
## study4          2.438202 11.452435  0.657845   3.706 0.000210 ***
## sex            -0.139892  0.869452  0.123244  -1.135 0.256341    
## age            -0.028809  0.971602  0.004469  -6.447 1.14e-10 ***
## bmi            -0.027572  0.972805  0.009261  -2.977 0.002909 ** 
## sbp             0.009108  1.009150  0.001237   7.366 1.76e-13 ***
## dm              0.216023  1.241131  0.124136   1.740 0.081821 .  
## cvd             0.243558  1.275780  0.121096   2.011 0.044296 *  
## egfr           -0.108912  0.896809  0.009062 -12.019  < 2e-16 ***
## log_uac         0.431860  1.540119  0.039410  10.958  < 2e-16 ***
## study2:sex     -0.648822  0.522661  0.207996  -3.119 0.001812 ** 
## study3:sex     -0.245007  0.782699  0.151761  -1.614 0.106435    
## study4:sex     -0.419775  0.657195  0.161245  -2.603 0.009232 ** 
## study2:age      0.031234  1.031727  0.008066   3.872 0.000108 ***
## study3:age      0.001918  1.001920  0.005994   0.320 0.748965    
## study4:age     -0.009896  0.990153  0.005719  -1.731 0.083538 .  
## study2:bmi      0.023543  1.023822  0.021891   1.075 0.282175    
## study3:bmi      0.018907  1.019087  0.010903   1.734 0.082898 .  
## study4:bmi     -0.009057  0.990984  0.013307  -0.681 0.496125    
## study2:dm      -0.263784  0.768140  0.196369  -1.343 0.179173    
## study3:dm       0.046088  1.047167  0.157622   0.292 0.769983    
## study4:dm       0.383529  1.467455  0.159815   2.400 0.016403 *  
## study2:cvd     -0.203581  0.815804  0.196358  -1.037 0.299836    
## study3:cvd     -0.027274  0.973094  0.152031  -0.179 0.857624    
## study4:cvd     -0.529061  0.589158  0.177018  -2.989 0.002801 ** 
## study2:egfr    -0.007648  0.992381  0.013639  -0.561 0.574983    
## study3:egfr     0.027893  1.028285  0.010198   2.735 0.006235 ** 
## study4:egfr     0.014051  1.014150  0.010515   1.336 0.181481    
## study2:log_uac  0.371409  1.449776  0.086510   4.293 1.76e-05 ***
## study3:log_uac  0.074758  1.077624  0.047577   1.571 0.116106    
## study4:log_uac -0.279634  0.756061  0.041629  -6.717 1.85e-11 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##                exp(coef) exp(-coef) lower .95 upper .95
## study2          0.005942  168.28362 0.0007789   0.04533
## study3          0.318744    3.13732 0.0831958   1.22119
## study4         11.452435    0.08732 3.1545648  41.57730
## sex             0.869452    1.15015 0.6828740   1.10701
## age             0.971602    1.02923 0.9631287   0.98015
## bmi             0.972805    1.02796 0.9553061   0.99062
## sbp             1.009150    0.99093 1.0067069   1.01160
## dm              1.241131    0.80572 0.9730902   1.58300
## cvd             1.275780    0.78383 1.0062337   1.61753
## egfr            0.896809    1.11506 0.8810220   0.91288
## log_uac         1.540119    0.64930 1.4256363   1.66380
## study2:sex      0.522661    1.91329 0.3476754   0.78572
## study3:sex      0.782699    1.27763 0.5813198   1.05384
## study4:sex      0.657195    1.52162 0.4791174   0.90146
## study2:age      1.031727    0.96925 1.0155444   1.04817
## study3:age      1.001920    0.99808 0.9902184   1.01376
## study4:age      0.990153    1.00995 0.9791168   1.00131
## study2:bmi      1.023822    0.97673 0.9808229   1.06871
## study3:bmi      1.019087    0.98127 0.9975406   1.04110
## study4:bmi      0.990984    1.00910 0.9654729   1.01717
## study2:dm       0.768140    1.30185 0.5227457   1.12873
## study3:dm       1.047167    0.95496 0.7688604   1.42621
## study4:dm       1.467455    0.68145 1.0728266   2.00724
## study2:cvd      0.815804    1.22578 0.5551946   1.19874
## study3:cvd      0.973094    1.02765 0.7223464   1.31088
## study4:cvd      0.589158    1.69734 0.4164410   0.83351
## study2:egfr     0.992381    1.00768 0.9662044   1.01927
## study3:egfr     1.028285    0.97249 1.0079365   1.04904
## study4:egfr     1.014150    0.98605 0.9934625   1.03527
## study2:log_uac  1.449776    0.68976 1.2236669   1.71767
## study3:log_uac  1.077624    0.92797 0.9816799   1.18294
## study4:log_uac  0.756061    1.32265 0.6968215   0.82034
## 
## Concordance= 0.873  (se = 0.004 )
## Rsquare= 0.225   (max possible= 0.884 )
## Likelihood ratio test= 3313  on 32 df,   p=<2e-16
## Wald test            = 2565  on 32 df,   p=<2e-16
## Score (logrank) test = 3603  on 32 df,   p=<2e-16
regTermTest(mfit_esrd, "study:sex")
## Wald test for study:sex
##  in coxph(formula = Surv(tesrd_, esrd_) ~ study * sex + study * age + 
##     study * bmi + sbp + study * dm + study * cvd + study * egfr + 
##     study * log_uac, data = ckd_studies)
## F =  41.92132  on  3  and  12954  df: p= < 2.22e-16
regTermTest(mfit_esrd, "study:age")
## Wald test for study:age
##  in coxph(formula = Surv(tesrd_, esrd_) ~ study * sex + study * age + 
##     study * bmi + sbp + study * dm + study * cvd + study * egfr + 
##     study * log_uac, data = ckd_studies)
## F =  8.079057  on  3  and  12954  df: p= 2.2522e-05
regTermTest(mfit_esrd, "study:bmi")
## Wald test for study:bmi
##  in coxph(formula = Surv(tesrd_, esrd_) ~ study * sex + study * age + 
##     study * bmi + sbp + study * dm + study * cvd + study * egfr + 
##     study * log_uac, data = ckd_studies)
## F =  2.213273  on  3  and  12954  df: p= 0.084362
regTermTest(mfit_esrd, "study:dm")
## Wald test for study:dm
##  in coxph(formula = Surv(tesrd_, esrd_) ~ study * sex + study * age + 
##     study * bmi + sbp + study * dm + study * cvd + study * egfr + 
##     study * log_uac, data = ckd_studies)
## F =  1.187018  on  3  and  12954  df: p= 0.31297
regTermTest(mfit_esrd, "study:cvd")
## Wald test for study:cvd
##  in coxph(formula = Surv(tesrd_, esrd_) ~ study * sex + study * age + 
##     study * bmi + sbp + study * dm + study * cvd + study * egfr + 
##     study * log_uac, data = ckd_studies)
## F =  2.282168  on  3  and  12954  df: p= 0.077007
regTermTest(mfit_esrd, "study:egfr")
## Wald test for study:egfr
##  in coxph(formula = Surv(tesrd_, esrd_) ~ study * sex + study * age + 
##     study * bmi + sbp + study * dm + study * cvd + study * egfr + 
##     study * log_uac, data = ckd_studies)
## F =  7.807495  on  3  and  12954  df: p= 3.3291e-05
regTermTest(mfit_esrd, "study:log_uac")
## Wald test for study:log_uac
##  in coxph(formula = Surv(tesrd_, esrd_) ~ study * sex + study * age + 
##     study * bmi + sbp + study * dm + study * cvd + study * egfr + 
##     study * log_uac, data = ckd_studies)
## F =  6.660485  on  3  and  12954  df: p= 0.00017245
# Removing diabetes
mfit_esrd <-coxph (Surv(tesrd_, esrd_) ~ study*sex + study*age + study*bmi + sbp + dm + study*cvd + study*egfr + study*log_uac, ckd_studies)
summary(mfit_esrd)
## Call:
## coxph(formula = Surv(tesrd_, esrd_) ~ study * sex + study * age + 
##     study * bmi + sbp + dm + study * cvd + study * egfr + study * 
##     log_uac, data = ckd_studies)
## 
##   n= 12986, number of events= 1519 
## 
##                     coef exp(coef)  se(coef)       z Pr(>|z|)    
## study2         -4.611427  0.009938  0.991619  -4.650 3.31e-06 ***
## study3         -1.207717  0.298879  0.670000  -1.803 0.071457 .  
## study4          2.277588  9.753131  0.646936   3.521 0.000431 ***
## sex            -0.135609  0.873184  0.123202  -1.101 0.271026    
## age            -0.029503  0.970928  0.004412  -6.687 2.28e-11 ***
## bmi            -0.029467  0.970963  0.009071  -3.248 0.001161 ** 
## sbp             0.009029  1.009070  0.001234   7.315 2.58e-13 ***
## dm              0.316922  1.372896  0.057300   5.531 3.19e-08 ***
## cvd             0.233835  1.263436  0.120548   1.940 0.052408 .  
## egfr           -0.109756  0.896052  0.009041 -12.139  < 2e-16 ***
## log_uac         0.426013  1.531140  0.038772  10.988  < 2e-16 ***
## study2:sex     -0.622265  0.536728  0.207549  -2.998 0.002716 ** 
## study3:sex     -0.247560  0.780704  0.151700  -1.632 0.102701    
## study4:sex     -0.430072  0.650462  0.161105  -2.670 0.007596 ** 
## study2:age      0.028544  1.028955  0.007921   3.604 0.000314 ***
## study3:age      0.002227  1.002229  0.005891   0.378 0.705412    
## study4:age     -0.008378  0.991657  0.005630  -1.488 0.136743    
## study2:bmi      0.016708  1.016848  0.021462   0.778 0.436281    
## study3:bmi      0.020230  1.020436  0.010653   1.899 0.057561 .  
## study4:bmi     -0.003954  0.996054  0.013049  -0.303 0.761887    
## study2:cvd     -0.224655  0.798792  0.195277  -1.150 0.249961    
## study3:cvd     -0.025661  0.974665  0.150780  -0.170 0.864860    
## study4:cvd     -0.500955  0.605952  0.176425  -2.839 0.004519 ** 
## study2:egfr    -0.006347  0.993673  0.013624  -0.466 0.641303    
## study3:egfr     0.028560  1.028972  0.010167   2.809 0.004967 ** 
## study4:egfr     0.016098  1.016228  0.010462   1.539 0.123874    
## study2:log_uac  0.320198  1.377401  0.081133   3.947 7.93e-05 ***
## study3:log_uac  0.076877  1.079910  0.046315   1.660 0.096942 .  
## study4:log_uac -0.266923  0.765732  0.040845  -6.535 6.36e-11 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##                exp(coef) exp(-coef) lower .95 upper .95
## study2          0.009938   100.6277  0.001423    0.0694
## study3          0.298879     3.3458  0.080388    1.1112
## study4          9.753131     0.1025  2.744550   34.6591
## sex             0.873184     1.1452  0.685861    1.1117
## age             0.970928     1.0299  0.962568    0.9794
## bmi             0.970963     1.0299  0.953852    0.9884
## sbp             1.009070     0.9910  1.006631    1.0115
## dm              1.372896     0.7284  1.227053    1.5361
## cvd             1.263436     0.7915  0.997568    1.6002
## egfr            0.896052     1.1160  0.880314    0.9121
## log_uac         1.531140     0.6531  1.419097    1.6520
## study2:sex      0.536728     1.8631  0.357345    0.8062
## study3:sex      0.780704     1.2809  0.579907    1.0510
## study4:sex      0.650462     1.5374  0.474339    0.8920
## study2:age      1.028955     0.9719  1.013104    1.0451
## study3:age      1.002229     0.9978  0.990724    1.0139
## study4:age      0.991657     1.0084  0.980775    1.0027
## study2:bmi      1.016848     0.9834  0.974962    1.0605
## study3:bmi      1.020436     0.9800  0.999351    1.0420
## study4:bmi      0.996054     1.0040  0.970902    1.0219
## study2:cvd      0.798792     1.2519  0.544771    1.1713
## study3:cvd      0.974665     1.0260  0.725290    1.3098
## study4:cvd      0.605952     1.6503  0.428810    0.8563
## study2:egfr     0.993673     1.0064  0.967490    1.0206
## study3:egfr     1.028972     0.9718  1.008671    1.0497
## study4:egfr     1.016228     0.9840  0.995603    1.0373
## study2:log_uac  1.377401     0.7260  1.174898    1.6148
## study3:log_uac  1.079910     0.9260  0.986197    1.1825
## study4:log_uac  0.765732     1.3059  0.706821    0.8296
## 
## Concordance= 0.873  (se = 0.004 )
## Rsquare= 0.224   (max possible= 0.884 )
## Likelihood ratio test= 3298  on 29 df,   p=<2e-16
## Wald test            = 2582  on 29 df,   p=<2e-16
## Score (logrank) test = 3602  on 29 df,   p=<2e-16
regTermTest(mfit_esrd, "study:sex")
## Wald test for study:sex
##  in coxph(formula = Surv(tesrd_, esrd_) ~ study * sex + study * age + 
##     study * bmi + sbp + dm + study * cvd + study * egfr + study * 
##     log_uac, data = ckd_studies)
## F =  41.82462  on  3  and  12957  df: p= < 2.22e-16
regTermTest(mfit_esrd, "study:age")
## Wald test for study:age
##  in coxph(formula = Surv(tesrd_, esrd_) ~ study * sex + study * age + 
##     study * bmi + sbp + dm + study * cvd + study * egfr + study * 
##     log_uac, data = ckd_studies)
## F =  7.394413  on  3  and  12957  df: p= 6.0269e-05
regTermTest(mfit_esrd, "study:bmi")
## Wald test for study:bmi
##  in coxph(formula = Surv(tesrd_, esrd_) ~ study * sex + study * age + 
##     study * bmi + sbp + dm + study * cvd + study * egfr + study * 
##     log_uac, data = ckd_studies)
## F =  2.004974  on  3  and  12957  df: p= 0.11094
regTermTest(mfit_esrd, "study:cvd")
## Wald test for study:cvd
##  in coxph(formula = Surv(tesrd_, esrd_) ~ study * sex + study * age + 
##     study * bmi + sbp + dm + study * cvd + study * egfr + study * 
##     log_uac, data = ckd_studies)
## F =  0.540694  on  3  and  12957  df: p= 0.6544
regTermTest(mfit_esrd, "study:egfr")
## Wald test for study:egfr
##  in coxph(formula = Surv(tesrd_, esrd_) ~ study * sex + study * age + 
##     study * bmi + sbp + dm + study * cvd + study * egfr + study * 
##     log_uac, data = ckd_studies)
## F =  7.504244  on  3  and  12957  df: p= 5.1476e-05
regTermTest(mfit_esrd, "study:log_uac")
## Wald test for study:log_uac
##  in coxph(formula = Surv(tesrd_, esrd_) ~ study * sex + study * age + 
##     study * bmi + sbp + dm + study * cvd + study * egfr + study * 
##     log_uac, data = ckd_studies)
## F =  5.941644  on  3  and  12957  df: p= 0.0004806
# Removing CVD: FINAL MODEL:
mfit_esrd <-coxph (Surv(tesrd_, esrd_) ~ study*sex + study*age + study*bmi + sbp + dm + cvd + study*egfr + study*log_uac, ckd_studies)
summary(mfit_esrd)
## Call:
## coxph(formula = Surv(tesrd_, esrd_) ~ study * sex + study * age + 
##     study * bmi + sbp + dm + cvd + study * egfr + study * log_uac, 
##     data = ckd_studies)
## 
##   n= 12986, number of events= 1519 
## 
##                     coef exp(coef)  se(coef)       z Pr(>|z|)    
## study2         -4.491362  0.011205  0.982117  -4.573 4.80e-06 ***
## study3         -1.217113  0.296084  0.665190  -1.830 0.067292 .  
## study4          2.482080 11.966126  0.642096   3.866 0.000111 ***
## sex            -0.142297  0.867363  0.123076  -1.156 0.247611    
## age            -0.027895  0.972491  0.004246  -6.570 5.04e-11 ***
## bmi            -0.029550  0.970882  0.009053  -3.264 0.001099 ** 
## sbp             0.008968  1.009008  0.001234   7.270 3.60e-13 ***
## dm              0.321865  1.379699  0.057241   5.623 1.88e-08 ***
## cvd             0.080425  1.083748  0.058011   1.386 0.165629    
## egfr           -0.109294  0.896467  0.009016 -12.122  < 2e-16 ***
## log_uac         0.428157  1.534427  0.038761  11.046  < 2e-16 ***
## study2:sex     -0.609091  0.543845  0.207108  -2.941 0.003272 ** 
## study3:sex     -0.244275  0.783272  0.151587  -1.611 0.107082    
## study4:sex     -0.386187  0.679643  0.160538  -2.406 0.016147 *  
## study2:age      0.026441  1.026794  0.007746   3.413 0.000642 ***
## study3:age      0.002349  1.002351  0.005599   0.419 0.674851    
## study4:age     -0.012175  0.987899  0.005418  -2.247 0.024636 *  
## study2:bmi      0.015735  1.015860  0.021383   0.736 0.461816    
## study3:bmi      0.020721  1.020937  0.010637   1.948 0.051405 .  
## study4:bmi     -0.007777  0.992253  0.013034  -0.597 0.550730    
## study2:egfr    -0.006758  0.993265  0.013604  -0.497 0.619362    
## study3:egfr     0.028072  1.028470  0.010143   2.768 0.005646 ** 
## study4:egfr     0.015666  1.015789  0.010443   1.500 0.133587    
## study2:log_uac  0.313734  1.368526  0.080540   3.895 9.80e-05 ***
## study3:log_uac  0.076192  1.079170  0.046278   1.646 0.099680 .  
## study4:log_uac -0.268991  0.764150  0.040810  -6.591 4.36e-11 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##                exp(coef) exp(-coef) lower .95 upper .95
## study2           0.01121   89.24290  0.001635   0.07681
## study3           0.29608    3.37742  0.080390   1.09050
## study4          11.96613    0.08357  3.399388  42.12176
## sex              0.86736    1.15292  0.681456   1.10399
## age              0.97249    1.02829  0.964431   0.98062
## bmi              0.97088    1.02999  0.953806   0.98826
## sbp              1.00901    0.99107  1.006571   1.01145
## dm               1.37970    0.72480  1.233278   1.54350
## cvd              1.08375    0.92272  0.967274   1.21425
## egfr             0.89647    1.11549  0.880764   0.91245
## log_uac          1.53443    0.65171  1.422175   1.65554
## study2:sex       0.54384    1.83876  0.362397   0.81614
## study3:sex       0.78327    1.27670  0.581945   1.05425
## study4:sex       0.67964    1.47136  0.496170   0.93096
## study2:age       1.02679    0.97391  1.011322   1.04250
## study3:age       1.00235    0.99765  0.991413   1.01341
## study4:age       0.98790    1.01225  0.977463   0.99845
## study2:bmi       1.01586    0.98439  0.974164   1.05934
## study3:bmi       1.02094    0.97949  0.999874   1.04244
## study4:bmi       0.99225    1.00781  0.967225   1.01793
## study2:egfr      0.99326    1.00678  0.967131   1.02010
## study3:egfr      1.02847    0.97232  1.008226   1.04912
## study4:egfr      1.01579    0.98446  0.995209   1.03680
## study2:log_uac   1.36853    0.73071  1.168684   1.60254
## study3:log_uac   1.07917    0.92664  0.985594   1.18163
## study4:log_uac   0.76415    1.30864  0.705409   0.82778
## 
## Concordance= 0.873  (se = 0.004 )
## Rsquare= 0.224   (max possible= 0.884 )
## Likelihood ratio test= 3287  on 26 df,   p=<2e-16
## Wald test            = 2575  on 26 df,   p=<2e-16
## Score (logrank) test = 3589  on 26 df,   p=<2e-16
regTermTest(mfit_esrd, "study:sex")
## Wald test for study:sex
##  in coxph(formula = Surv(tesrd_, esrd_) ~ study * sex + study * age + 
##     study * bmi + sbp + dm + cvd + study * egfr + study * log_uac, 
##     data = ckd_studies)
## F =  42.15648  on  3  and  12960  df: p= < 2.22e-16
regTermTest(mfit_esrd, "study:age")
## Wald test for study:age
##  in coxph(formula = Surv(tesrd_, esrd_) ~ study * sex + study * age + 
##     study * bmi + sbp + dm + cvd + study * egfr + study * log_uac, 
##     data = ckd_studies)
## F =  6.375559  on  3  and  12960  df: p= 0.00025904
regTermTest(mfit_esrd, "study:bmi")
## Wald test for study:bmi
##  in coxph(formula = Surv(tesrd_, esrd_) ~ study * sex + study * age + 
##     study * bmi + sbp + dm + cvd + study * egfr + study * log_uac, 
##     data = ckd_studies)
## F =  3.040171  on  3  and  12960  df: p= 0.027767
regTermTest(mfit_esrd, "study:egfr")
## Wald test for study:egfr
##  in coxph(formula = Surv(tesrd_, esrd_) ~ study * sex + study * age + 
##     study * bmi + sbp + dm + cvd + study * egfr + study * log_uac, 
##     data = ckd_studies)
## F =  4.965735  on  3  and  12960  df: p= 0.0019138
regTermTest(mfit_esrd, "study:log_uac")
## Wald test for study:log_uac
##  in coxph(formula = Surv(tesrd_, esrd_) ~ study * sex + study * age + 
##     study * bmi + sbp + dm + cvd + study * egfr + study * log_uac, 
##     data = ckd_studies)
## F =  5.775913  on  3  and  12960  df: p= 0.00060823
# Checking PH assumption:
temp_esrd <- cox.zph(mfit_esrd)
print(temp_esrd)
##                      rho    chisq        p
## study2          1.51e-02 3.99e-01 0.527596
## study3          9.43e-03 1.48e-01 0.700615
## study4          3.85e-02 2.46e+00 0.116968
## sex             2.38e-02 9.02e-01 0.342288
## age             1.35e-02 2.38e-01 0.625814
## bmi             3.52e-02 2.51e+00 0.112851
## sbp            -2.78e-02 1.08e+00 0.298677
## dm             -3.01e-02 1.42e+00 0.233284
## cvd            -9.55e-03 1.39e-01 0.709177
## egfr            8.38e-02 1.30e+01 0.000317
## log_uac        -4.21e-03 3.49e-02 0.851719
## study2:sex     -4.75e-05 3.45e-06 0.998517
## study3:sex     -1.79e-02 4.96e-01 0.481070
## study4:sex     -2.87e-02 1.29e+00 0.256669
## study2:age     -4.50e-03 3.07e-02 0.861006
## study3:age      3.71e-02 1.88e+00 0.170792
## study4:age      5.15e-03 3.39e-02 0.853944
## study2:bmi      3.75e-02 2.62e+00 0.105569
## study3:bmi     -1.87e-02 6.79e-01 0.409881
## study4:bmi     -8.35e-03 1.38e-01 0.710199
## study2:egfr    -3.72e-02 2.64e+00 0.104004
## study3:egfr    -4.49e-02 3.67e+00 0.055305
## study4:egfr    -6.05e-02 6.91e+00 0.008549
## study2:log_uac -2.12e-02 8.07e-01 0.368882
## study3:log_uac  7.85e-03 1.13e-01 0.736431
## study4:log_uac -1.53e-02 4.45e-01 0.504550
## GLOBAL                NA 5.84e+01 0.000272
plot(temp_esrd)

# Strategy II: using LASSO to perform variable selection: 
# (least absolute shrinkage and selection operator) 
library("glmnet")
## Loading required package: foreach
## Loaded glmnet 2.0-16
attach(ckd_studies)

# generate multiplicative interactions between covariates and group:
ckd_studies$sex_interaction<-(sex)*(group)
ckd_studies$age_interaction<-(age)*(group)
ckd_studies$bmi_interaction<-(bmi)*(group)
ckd_studies$sbp_interaction<-(sbp)*(group)
ckd_studies$dm_interaction<-(dm)*(group) 
ckd_studies$cvd_interaction<-(cvd)*(group)
ckd_studies$egfr_interaction<-(egfr)*(group)
ckd_studies$log_uac_interaction<-(log_uac)*(group)

# generate x: matrix containing all covariates that will
# be included in the model:
x<-as.matrix(ckd_studies[,c(-1, -2, -3, -5, 
                            -8, -10, -12, -15, -17, -18,
                            -19, -20, -21, -22, -23, 
                            -24, -25, -26, -27)])

# fit model:
cv.fit_esrd <- cv.glmnet(x, Surv(tesrd_, esrd_), family="cox", maxit = 10000)
fit_esrd <- glmnet(x, Surv(tesrd_, esrd_), family="cox", maxit = 10000)
plot(cv.fit_esrd)

cv.fit_esrd$lambda.min
## [1] 0.0002354158
Coefficients_esrd <- coef(fit_esrd, s = cv.fit_esrd$lambda.min)
Active.Index_esrd <- which(Coefficients_esrd != 0)
Active.Coefficients_esrd  <- Coefficients_esrd[Active.Index_esrd]
Active.Coefficients_esrd
##  [1]  0.6045688360 -0.0876905894 -0.0132464080 -0.1506681903  0.4339491330
##  [6]  0.0086741075  0.0099173396  1.2247056933 -0.0971022392 -0.0065748449
## [11] -0.0020770361  0.1435607507 -0.1075958069  0.0006315415 -0.0976142356
coef_esrd <-c(Active.Coefficients_esrd)
rows <- c("log_uac", "egfr", "age", "sex", "dm", "cvd", "bmi", "sbp", "sex_interaction", "age_interaction", "bmi_interaction", "sbp_interaction", "dm_interaction", "cvd_interaction", "egfr_interaction", "log_uac_interaction")
sig_coef_esrd <-c(coef_esrd<0.0002835591)
esrd_results <-data.frame(cbind(rows, coef_esrd, sig_coef_esrd))
## Warning in cbind(rows, coef_esrd, sig_coef_esrd): number of rows of result
## is not a multiple of vector length (arg 2)
# list of selected variables:
table(esrd_results$rows, esrd_results$sig_coef_esrd)
##                      
##                       FALSE TRUE
##   age                     0    1
##   age_interaction         0    1
##   bmi                     1    0
##   bmi_interaction         0    1
##   cvd                     1    0
##   cvd_interaction         1    0
##   dm                      1    0
##   dm_interaction          0    1
##   egfr                    0    1
##   egfr_interaction        0    1
##   log_uac                 1    0
##   log_uac_interaction     1    0
##   sbp                     1    0
##   sbp_interaction         1    0
##   sex                     0    1
##   sex_interaction         0    1

5. Propensity Score Balanced Analysis

Using the Twang package to estimate propensity scores we refit the Cox Proportional Hazards Models across balanced study groups and tested for the interactions by study.

library(twang)
## Loading required package: gbm
## Loaded gbm 2.1.4
## Loading required package: xtable
## Loading required package: lattice
## Loading required package: latticeExtra
## Loading required package: RColorBrewer
library(gbm)
library(survminer)
## Loading required package: ggplot2
## 
## Attaching package: 'ggplot2'
## The following object is masked from 'package:latticeExtra':
## 
##     layer
## Loading required package: ggpubr
## Loading required package: magrittr
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
prop_score<-as.data.frame(ckd_studies[,c(-2, -3, -5, 
                            -8, -10, -12, -15, -17, -18,
                            -19, -20, -21, -22, -23, 
                            -24, -25, -26)])

# generating factor for study groups
prop_score$studies<-factor(prop_score$group)

attach(prop_score)
## The following objects are masked from ckd_studies:
## 
##     age, bmi, cvd, dm, egfr, group, log_uac, sbp, sex, study,
##     tdeath_
set.seed(123)

# mnps: multinomial propensity scores
mnps.ckd <- mnps(studies ~ egfr + log_uac + age + sex + 
                   dm + bmi + sbp + cvd,
                   data = ckd_studies,
                   estimand = "ATE",
                   verbose = FALSE,
                   stop.method = c("es.mean", "ks.mean"),
                   n.trees = 1000)
## Warning in ps(formula = currFormula, data = currDat, n.trees =
## n.trees[i], : Optimal number of iterations is close to the specified
## n.trees. n.trees is likely set too small and better balance might be
## obtainable by setting n.trees to be larger.

## Warning in ps(formula = currFormula, data = currDat, n.trees =
## n.trees[i], : Optimal number of iterations is close to the specified
## n.trees. n.trees is likely set too small and better balance might be
## obtainable by setting n.trees to be larger.

## Warning in ps(formula = currFormula, data = currDat, n.trees =
## n.trees[i], : Optimal number of iterations is close to the specified
## n.trees. n.trees is likely set too small and better balance might be
## obtainable by setting n.trees to be larger.

## Warning in ps(formula = currFormula, data = currDat, n.trees =
## n.trees[i], : Optimal number of iterations is close to the specified
## n.trees. n.trees is likely set too small and better balance might be
## obtainable by setting n.trees to be larger.

## Warning in ps(formula = currFormula, data = currDat, n.trees =
## n.trees[i], : Optimal number of iterations is close to the specified
## n.trees. n.trees is likely set too small and better balance might be
## obtainable by setting n.trees to be larger.
# estimating weights
# ATE: average treatment effects on the population
# Applying ATE results on how, on average,  the outcome of interest would change if everyone in the population of interest had been assigned to a particular treatment relative to if they had all received another single treatment. (As opposed to ATT: average treatment effects on the treated).

# Plot 1:
# check number of iterations necessary to achieve balance. 
plot(mnps.ckd, plots = 1)

## [[1]]

## 
## [[2]]

## 
## [[3]]

## 
## [[4]]

# Plot 2:
# examine the overlap of propensity score distributions (they should overlap).
# equivalent to testing the positivity assumption:
# every individual has a non-zero probability of belonging to any of the 6 groups.
# This assumption was not adequately met!
plot(mnps.ckd, plots = 2, subset = "es.mean")

## [[1]]

## 
## [[2]]

## 
## [[3]]

## 
## [[4]]

# attaching weights (w4) to each individual:
library(survey)
ckd_studies$w4 <- get.weights(mnps.ckd, stop.method = "es.mean")
ckd_studies$study<-as.factor(ckd_studies$group)

# fitting Cox PH model to evaluate the interactions by study on the association between baseline characteristics and outcomes across propensity-score balanced groups:

# ESRD:
psfit_esrd <-coxph(Surv(tesrd_, esrd_) ~ study*age, ckd_studies, weights = w4)
summary(psfit_esrd)
## Call:
## coxph(formula = Surv(tesrd_, esrd_) ~ study * age, data = ckd_studies, 
##     weights = w4)
## 
##   n= 12986, number of events= 1519 
## 
##                 coef exp(coef)  se(coef)       z Pr(>|z|)    
## study2     -3.061727  0.046807  0.256313 -11.945  < 2e-16 ***
## study3     -0.280798  0.755181  0.164433  -1.708  0.08770 .  
## study4     -0.021691  0.978542  0.169934  -0.128  0.89843    
## age        -0.032115  0.968395  0.002016 -15.926  < 2e-16 ***
## study2:age  0.042518  1.043435  0.004214  10.089  < 2e-16 ***
## study3:age  0.008858  1.008897  0.002845   3.114  0.00185 ** 
## study4:age -0.006470  0.993551  0.002880  -2.247  0.02467 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##            exp(coef) exp(-coef) lower .95 upper .95
## study2       0.04681    21.3644   0.02832   0.07735
## study3       0.75518     1.3242   0.54712   1.04236
## study4       0.97854     1.0219   0.70135   1.36530
## age          0.96840     1.0326   0.96458   0.97223
## study2:age   1.04343     0.9584   1.03485   1.05209
## study3:age   1.00890     0.9912   1.00329   1.01454
## study4:age   0.99355     1.0065   0.98796   0.99918
## 
## Concordance= 0.649  (se = 0.009 )
## Rsquare= 0.092   (max possible= 0.999 )
## Likelihood ratio test= 1247  on 7 df,   p=<2e-16
## Wald test            = 1225  on 7 df,   p=<2e-16
## Score (logrank) test = 1323  on 7 df,   p=<2e-16
# test overall significance of the interaction term:
regTermTest(psfit_esrd, "study:age")
## Wald test for study:age
##  in coxph(formula = Surv(tesrd_, esrd_) ~ study * age, data = ckd_studies, 
##     weights = w4)
## F =  131.9568  on  3  and  12979  df: p= < 2.22e-16
regTermTest(psfit_esrd, "study")
## Wald test for study
##  in coxph(formula = Surv(tesrd_, esrd_) ~ study * age, data = ckd_studies, 
##     weights = w4)
## F =  73.89563  on  2  and  12979  df: p= < 2.22e-16
regTermTest(psfit_esrd, "age")
## Wald test for age
##  in coxph(formula = Surv(tesrd_, esrd_) ~ study * age, data = ckd_studies, 
##     weights = w4)
## F =  0.01629358  on  1  and  12979  df: p= 0.89843
psfit_esrd <-coxph(Surv(tesrd_, esrd_) ~ study*sex, ckd_studies, weights = w4)
summary(psfit_esrd)
## Call:
## coxph(formula = Surv(tesrd_, esrd_) ~ study * sex, data = ckd_studies, 
##     weights = w4)
## 
##   n= 12986, number of events= 1519 
## 
##                coef exp(coef) se(coef)      z Pr(>|z|)    
## study2     -0.34215   0.71024  0.05971 -5.731 1.00e-08 ***
## study3      0.38393   1.46804  0.04613  8.322  < 2e-16 ***
## study4     -0.38812   0.67833  0.05090 -7.626 2.42e-14 ***
## sex        -0.02706   0.97330  0.05834 -0.464    0.643    
## study2:sex -0.68386   0.50467  0.11574 -5.908 3.45e-09 ***
## study3:sex -0.30041   0.74051  0.07658 -3.923 8.75e-05 ***
## study4:sex -0.36017   0.69756  0.08645 -4.166 3.10e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##            exp(coef) exp(-coef) lower .95 upper .95
## study2        0.7102     1.4080    0.6318    0.7984
## study3        1.4680     0.6812    1.3411    1.6070
## study4        0.6783     1.4742    0.6139    0.7495
## sex           0.9733     1.0274    0.8681    1.0912
## study2:sex    0.5047     1.9815    0.4022    0.6332
## study3:sex    0.7405     1.3504    0.6373    0.8604
## study4:sex    0.6976     1.4336    0.5888    0.8264
## 
## Concordance= 0.613  (se = 0.009 )
## Rsquare= 0.052   (max possible= 0.999 )
## Likelihood ratio test= 694.8  on 7 df,   p=<2e-16
## Wald test            = 650.4  on 7 df,   p=<2e-16
## Score (logrank) test = 694.8  on 7 df,   p=<2e-16
# test overall significance of the interaction term:
regTermTest(psfit_esrd, "study:sex")
## Wald test for study:sex
##  in coxph(formula = Surv(tesrd_, esrd_) ~ study * sex, data = ckd_studies, 
##     weights = w4)
## F =  31.45658  on  3  and  12979  df: p= < 2.22e-16
regTermTest(psfit_esrd, "study")
## Wald test for study
##  in coxph(formula = Surv(tesrd_, esrd_) ~ study * sex, data = ckd_studies, 
##     weights = w4)
## F =  89.69155  on  2  and  12979  df: p= < 2.22e-16
regTermTest(psfit_esrd, "sex")
## Wald test for sex
##  in coxph(formula = Surv(tesrd_, esrd_) ~ study * sex, data = ckd_studies, 
##     weights = w4)
## F =  58.15398  on  1  and  12979  df: p= 2.5922e-14
psfit_esrd <-coxph(Surv(tesrd_, esrd_) ~ study*dm, ckd_studies, weights = w4)
psfit_esrd
## Call:
## coxph(formula = Surv(tesrd_, esrd_) ~ study * dm, data = ckd_studies, 
##     weights = w4)
## 
##                coef exp(coef)  se(coef)       z        p
## study2    -0.670200  0.511606  0.074475  -8.999  < 2e-16
## study3    -0.001832  0.998170  0.055269  -0.033   0.9736
## study4    -0.719313  0.487087  0.060339 -11.921  < 2e-16
## dm         0.231802  1.260870  0.055992   4.140 3.47e-05
## study2:dm  0.248894  1.282605  0.101348   2.456   0.0141
## study3:dm  0.466476  1.594366  0.074516   6.260 3.85e-10
## study4:dm  0.391459  1.479138  0.082605   4.739 2.15e-06
## 
## Likelihood ratio test=919.6  on 7 df, p=< 2.2e-16
## n= 12986, number of events= 1519
# test overall significance of the interaction term:
regTermTest(psfit_esrd, "study:dm")
## Wald test for study:dm
##  in coxph(formula = Surv(tesrd_, esrd_) ~ study * dm, data = ckd_studies, 
##     weights = w4)
## F =  83.68907  on  3  and  12979  df: p= < 2.22e-16
regTermTest(psfit_esrd, "study")
## Wald test for study
##  in coxph(formula = Surv(tesrd_, esrd_) ~ study * dm, data = ckd_studies, 
##     weights = w4)
## F =  47.07251  on  2  and  12979  df: p= < 2.22e-16
regTermTest(psfit_esrd, "dm")
## Wald test for dm
##  in coxph(formula = Surv(tesrd_, esrd_) ~ study * dm, data = ckd_studies, 
##     weights = w4)
## F =  142.1152  on  1  and  12979  df: p= < 2.22e-16
psfit_esrd <-coxph(Surv(tesrd_, esrd_) ~ study*cvd, ckd_studies, weights = w4)
psfit_esrd
## Call:
## coxph(formula = Surv(tesrd_, esrd_) ~ study * cvd, data = ckd_studies, 
##     weights = w4)
## 
##                coef exp(coef) se(coef)       z        p
## study2     -0.65623   0.51881  0.05997 -10.942  < 2e-16
## study3      0.10187   1.10724  0.04489   2.269 0.023254
## study4     -0.46359   0.62902  0.04729  -9.803  < 2e-16
## cvd        -0.16533   0.84762  0.06158  -2.685 0.007262
## study2:cvd  0.40156   1.49415  0.11241   3.572 0.000354
## study3:cvd  0.48963   1.63170  0.07904   6.195 5.84e-10
## study4:cvd -0.31166   0.73223  0.09775  -3.188 0.001431
## 
## Likelihood ratio test=653.2  on 7 df, p=< 2.2e-16
## n= 12986, number of events= 1519
# test overall effect of multilevel variable group:
regTermTest(psfit_esrd, "study:cvd")
## Wald test for study:cvd
##  in coxph(formula = Surv(tesrd_, esrd_) ~ study * cvd, data = ckd_studies, 
##     weights = w4)
## F =  18.78304  on  3  and  12979  df: p= 3.7532e-12
regTermTest(psfit_esrd, "study")
## Wald test for study
##  in coxph(formula = Surv(tesrd_, esrd_) ~ study * cvd, data = ckd_studies, 
##     weights = w4)
## F =  87.09621  on  2  and  12979  df: p= < 2.22e-16
regTermTest(psfit_esrd, "cvd")
## Wald test for cvd
##  in coxph(formula = Surv(tesrd_, esrd_) ~ study * cvd, data = ckd_studies, 
##     weights = w4)
## F =  96.09371  on  1  and  12979  df: p= < 2.22e-16
psfit_esrd <-coxph(Surv(tesrd_, esrd_) ~ study*bmi, ckd_studies, weights = w4)
psfit_esrd
## Call:
## coxph(formula = Surv(tesrd_, esrd_) ~ study * bmi, data = ckd_studies, 
##     weights = w4)
## 
##                 coef exp(coef)  se(coef)      z        p
## study2     -1.779042  0.168800  0.276818 -6.427 1.30e-10
## study3     -0.167861  0.845471  0.167746 -1.001  0.31698
## study4      0.066158  1.068396  0.211760  0.312  0.75472
## bmi        -0.027013  0.973348  0.004608 -5.862 4.56e-09
## study2:bmi  0.044181  1.045172  0.010029  4.405 1.06e-05
## study3:bmi  0.015154  1.015270  0.005739  2.640  0.00828
## study4:bmi -0.022163  0.978081  0.007544 -2.938  0.00331
## 
## Likelihood ratio test=683.8  on 7 df, p=< 2.2e-16
## n= 12986, number of events= 1519
# test overall effect of multilevel variable group:
regTermTest(psfit_esrd, "study:bmi")
## Wald test for study:bmi
##  in coxph(formula = Surv(tesrd_, esrd_) ~ study * bmi, data = ckd_studies, 
##     weights = w4)
## F =  16.69821  on  3  and  12979  df: p= 7.9887e-11
regTermTest(psfit_esrd, "study")
## Wald test for study
##  in coxph(formula = Surv(tesrd_, esrd_) ~ study * bmi, data = ckd_studies, 
##     weights = w4)
## F =  21.87493  on  2  and  12979  df: p= 3.2796e-10
regTermTest(psfit_esrd, "bmi")
## Wald test for bmi
##  in coxph(formula = Surv(tesrd_, esrd_) ~ study * bmi, data = ckd_studies, 
##     weights = w4)
## F =  0.09760691  on  1  and  12979  df: p= 0.75473
psfit_esrd <-coxph(Surv(tesrd_, esrd_) ~ study*sbp, ckd_studies, weights = w4)
psfit_esrd
## Call:
## coxph(formula = Surv(tesrd_, esrd_) ~ study * sbp, data = ckd_studies, 
##     weights = w4)
## 
##                 coef exp(coef)  se(coef)      z        p
## study2     -1.384552  0.250436  0.332994 -4.158 3.21e-05
## study3     -0.510379  0.600268  0.234448 -2.177 0.029485
## study4      0.956139  2.601631  0.262366  3.644 0.000268
## sbp         0.017070  1.017216  0.001351 12.636  < 2e-16
## study2:sbp  0.005920  1.005938  0.002353  2.516 0.011860
## study3:sbp  0.005548  1.005563  0.001672  3.319 0.000905
## study4:sbp -0.011100  0.988961  0.001898 -5.847 4.99e-09
## 
## Likelihood ratio test=1328  on 7 df, p=< 2.2e-16
## n= 12986, number of events= 1519
# test overall effect of multilevel variable group:
regTermTest(psfit_esrd, "study:sbp")
## Wald test for study:sbp
##  in coxph(formula = Surv(tesrd_, esrd_) ~ study * sbp, data = ckd_studies, 
##     weights = w4)
## F =  275.6837  on  3  and  12979  df: p= < 2.22e-16
regTermTest(psfit_esrd, "study")
## Wald test for study
##  in coxph(formula = Surv(tesrd_, esrd_) ~ study * sbp, data = ckd_studies, 
##     weights = w4)
## F =  8.703677  on  2  and  12979  df: p= 0.00016695
regTermTest(psfit_esrd, "sbp")
## Wald test for sbp
##  in coxph(formula = Surv(tesrd_, esrd_) ~ study * sbp, data = ckd_studies, 
##     weights = w4)
## F =  13.28084  on  1  and  12979  df: p= 0.00026918
psfit_esrd <-coxph(Surv(tesrd_, esrd_) ~ study*egfr, ckd_studies, weights = w4)
psfit_esrd
## Call:
## coxph(formula = Surv(tesrd_, esrd_) ~ study * egfr, data = ckd_studies, 
##     weights = w4)
## 
##                  coef exp(coef)  se(coef)       z        p
## study2       0.627035  1.872051  0.169211   3.706 0.000211
## study3       0.704979  2.023804  0.122954   5.734 9.83e-09
## study4      -0.113947  0.892305  0.135439  -0.841 0.400171
## egfr        -0.079608  0.923478  0.003224 -24.694  < 2e-16
## study2:egfr -0.046114  0.954933  0.006523  -7.070 1.55e-12
## study3:egfr -0.005258  0.994755  0.004118  -1.277 0.201668
## study4:egfr -0.008288  0.991746  0.004624  -1.793 0.073043
## 
## Likelihood ratio test=4064  on 7 df, p=< 2.2e-16
## n= 12986, number of events= 1519
# test overall effect of multilevel variable group:
regTermTest(psfit_esrd, "study:egfr")
## Wald test for study:egfr
##  in coxph(formula = Surv(tesrd_, esrd_) ~ study * egfr, data = ckd_studies, 
##     weights = w4)
## F =  728.4761  on  3  and  12979  df: p= < 2.22e-16
regTermTest(psfit_esrd, "study")
## Wald test for study
##  in coxph(formula = Surv(tesrd_, esrd_) ~ study * egfr, data = ckd_studies, 
##     weights = w4)
## F =  17.50941  on  2  and  12979  df: p= 2.5468e-08
regTermTest(psfit_esrd, "egfr")
## Wald test for egfr
##  in coxph(formula = Surv(tesrd_, esrd_) ~ study * egfr, data = ckd_studies, 
##     weights = w4)
## F =  0.7078139  on  1  and  12979  df: p= 0.40019
psfit_esrd <-coxph(Surv(tesrd_, esrd_) ~ study*log_uac, ckd_studies, weights = w4)
psfit_esrd
## Call:
## coxph(formula = Surv(tesrd_, esrd_) ~ study * log_uac, data = ckd_studies, 
##     weights = w4)
## 
##                    coef exp(coef) se(coef)       z        p
## study2         -2.87935   0.05617  0.29683  -9.700  < 2e-16
## study3          0.24349   1.27569  0.15918   1.530    0.126
## study4          2.09898   8.15786  0.13585  15.450  < 2e-16
## log_uac         0.57410   1.77553  0.01817  31.589  < 2e-16
## study2:log_uac  0.32547   1.38468  0.04101   7.936 2.09e-15
## study3:log_uac  0.02073   1.02094  0.02322   0.893    0.372
## study4:log_uac -0.36926   0.69125  0.02040 -18.100  < 2e-16
## 
## Likelihood ratio test=5475  on 7 df, p=< 2.2e-16
## n= 12986, number of events= 1519
# test overall effect of multilevel variable group:
regTermTest(psfit_esrd, "study:log_uac")
## Wald test for study:log_uac
##  in coxph(formula = Surv(tesrd_, esrd_) ~ study * log_uac, data = ckd_studies, 
##     weights = w4)
## F =  1085.725  on  3  and  12979  df: p= < 2.22e-16
regTermTest(psfit_esrd, "study")
## Wald test for study
##  in coxph(formula = Surv(tesrd_, esrd_) ~ study * log_uac, data = ckd_studies, 
##     weights = w4)
## F =  59.51278  on  2  and  12979  df: p= < 2.22e-16
regTermTest(psfit_esrd, "log_uac")
## Wald test for log_uac
##  in coxph(formula = Surv(tesrd_, esrd_) ~ study * log_uac, data = ckd_studies, 
##     weights = w4)
## F =  238.7094  on  1  and  12979  df: p= < 2.22e-16

6. Latent Class Analysis

We used an unsupervised machine learning method to organize the participants in groups according to their baseline characteristics, but despite of study group or origin. We Used the depmixS4 R-package34 for hidden Markov models we fitted a latent class model and defined three latent classes. Then, we tested for the interaction by study on the association between latent class and outcome using Cox Proportional Hazards Models.

library(depmixS4)
## Loading required package: nnet
## Loading required package: MASS
## 
## Attaching package: 'MASS'
## The following object is masked from 'package:dplyr':
## 
##     select
## Loading required package: Rsolnp
library(haven)
attach(ckd_studies)
## The following objects are masked from prop_score:
## 
##     age, age_interaction, bmi, bmi_interaction, cvd,
##     cvd_interaction, dm, dm_interaction, egfr, egfr_interaction,
##     group, log_uac, log_uac_interaction, sbp, sbp_interaction,
##     sex, sex_interaction, study, tdeath_
## The following objects are masked from ckd_studies (pos = 19):
## 
##     age, age_cat, alb, bmi, bmi_cat, ca, chol, ckd_stage, cvd,
##     death_, dm, egfr, esrd_, group, hb, hb_cat, hyp, log_uac,
##     phos, race, sbp, sbp_cat, sex, study, tdeath_, tesrd_, uac,
##     uac_cat
# Fitting model with 4 latent classes
instart=c(0.1, 0.1, 0.1, 0.1, 0.1)
set.seed(3)
mod <- mix (list (egfr ~1, log_uac ~1, age ~1, 
                  sex ~ 1, dm ~ 1, 
                  cvd ~ 1, bmi ~ 1, sbp ~1), 
            data = ckd_studies, 
            nstates = 4,
            family = list (gaussian(), gaussian(), gaussian (), multinomial("identity"), multinomial("identity"), multinomial("identity"), gaussian(), gaussian ()),
            repstart=c(rep(c(0.9, 0.1),4),rep(c(0.1, 0.9),4)),
            initdata=ckd_studies)
fmod3<- fit(mod, verbose=FALSE)
## Warning in em.mix(object = object, maxit = emcontrol$maxit, tol =
## emcontrol$tol, : likelihood decreased on iteration 22
fmod3
## Convergence info: likelihood decreased in EM iteration; stopped without convergence. 
## 'log Lik.' -117373.6 (df=55)
## AIC:  234857.2 
## BIC:  235268.1
summary(fmod3)
## Mixture probabilities model 
##       pr1       pr2       pr3       pr4 
## 0.2975176 0.1721763 0.3149243 0.2153819 
## 
## Response parameters 
## Resp 1 : gaussian 
## Resp 2 : gaussian 
## Resp 3 : gaussian 
## Resp 4 : multinomial 
## Resp 5 : multinomial 
## Resp 6 : multinomial 
## Resp 7 : gaussian 
## Resp 8 : gaussian 
##     Re1.(Intercept)   Re1.sd Re2.(Intercept)       Re2.sd Re3.(Intercept)
## St1        31.76839 10.27502        5.183732 1.933168e+00        68.52570
## St2        38.02064 11.48780        4.978690 2.277745e+00        59.59399
## St3        38.55606 10.51402       -2.302585 1.773957e-15        70.99953
## St4        38.27178 11.91515        4.372476 2.154948e+00        54.29693
##        Re3.sd     Re4.0     Re4.1     Re5.0     Re5.1     Re6.0      Re6.1
## St1  8.161157 0.7322467 0.2677533 0.4468779 0.5531221 0.5379892 0.46201084
## St2 10.347083 0.4491316 0.5508684 0.2769553 0.7230447 0.6160089 0.38399106
## St3 10.698503 0.5455293 0.4544707 0.6742963 0.3257037 0.6887234 0.31127658
## St4 12.863796 0.5422743 0.4577257 0.8447108 0.1552892 0.9487175 0.05128254
##     Re7.(Intercept)   Re7.sd Re8.(Intercept)   Re8.sd
## St1        27.19610 4.453715        134.9580 18.98507
## St2        35.71332 9.162761        136.8720 25.00465
## St3        29.13699 5.709200        133.7068 21.25859
## St4        25.49808 4.519986        121.7561 14.33851
# saving class assignments for model with 4 latent classes:
latentclass <- depmixS4::posterior(fmod3)
head(round(latentclass ,2))
##   state   S1   S2 S3   S4
## 1     4 0.25 0.02  0 0.73
## 2     1 0.91 0.02  0 0.07
## 3     1 0.93 0.07  0 0.00
## 4     1 0.88 0.04  0 0.08
## 5     1 0.82 0.12  0 0.06
## 6     1 0.87 0.06  0 0.07
latentclass$state <- as.factor(latentclass$state)
summary(latentclass$state)
##    1    2    3    4 
## 4183 1833 4091 2879
ckd_studies$latentclass<-latentclass$state

# Graphs
library("RColorBrewer")
# Grouped Bar Plot
counts <- table(ckd_studies$latentclass, ckd_studies$group)
barplot(counts, main="Latent Classes Distribution by Study",
        xlab="Studies", col=brewer.pal(n=4, name= "Blues"),
        legend = FALSE)

# fitting Cox PH model including interactions by study on the association between latent classess and outcomes:

# just include latent classes found in all 4 groups:
ckd_studies_fewer<-ckd_studies %>%
  filter(latentclass !="3")

mfit_esrd <-coxph (Surv(tesrd_, esrd_)~ study*ckd_studies_fewer$latentclass, ckd_studies_fewer) 
summary(mfit_esrd)
## Call:
## coxph(formula = Surv(tesrd_, esrd_) ~ study * ckd_studies_fewer$latentclass, 
##     data = ckd_studies_fewer)
## 
##   n= 8895, number of events= 1421 
## 
##                                            coef exp(coef)  se(coef)      z
## study2                                 0.070585  1.073136  0.108449  0.651
## study3                                 0.101927  1.107302  0.100062  1.019
## study4                                -0.155239  0.856210  0.107760 -1.441
## ckd_studies_fewer$latentclass2         0.209338  1.232861  0.141522  1.479
## ckd_studies_fewer$latentclass3               NA        NA  0.000000     NA
## ckd_studies_fewer$latentclass4         0.019374  1.019563  0.150224  0.129
## study2:ckd_studies_fewer$latentclass2 -0.131834  0.876486  0.331136 -0.398
## study3:ckd_studies_fewer$latentclass2 -0.277126  0.757959  0.169509 -1.635
## study4:ckd_studies_fewer$latentclass2 -0.528856  0.589279  0.234280 -2.257
## study2:ckd_studies_fewer$latentclass3        NA        NA  0.000000     NA
## study3:ckd_studies_fewer$latentclass3        NA        NA  0.000000     NA
## study4:ckd_studies_fewer$latentclass3        NA        NA  0.000000     NA
## study2:ckd_studies_fewer$latentclass4 -0.996069  0.369329  0.223902 -4.449
## study3:ckd_studies_fewer$latentclass4 -0.857348  0.424286  0.191722 -4.472
## study4:ckd_studies_fewer$latentclass4  0.007716  1.007746  0.191502  0.040
##                                       Pr(>|z|)    
## study2                                   0.515    
## study3                                   0.308    
## study4                                   0.150    
## ckd_studies_fewer$latentclass2           0.139    
## ckd_studies_fewer$latentclass3              NA    
## ckd_studies_fewer$latentclass4           0.897    
## study2:ckd_studies_fewer$latentclass2    0.691    
## study3:ckd_studies_fewer$latentclass2    0.102    
## study4:ckd_studies_fewer$latentclass2    0.024 *  
## study2:ckd_studies_fewer$latentclass3       NA    
## study3:ckd_studies_fewer$latentclass3       NA    
## study4:ckd_studies_fewer$latentclass3       NA    
## study2:ckd_studies_fewer$latentclass4 8.64e-06 ***
## study3:ckd_studies_fewer$latentclass4 7.76e-06 ***
## study4:ckd_studies_fewer$latentclass4    0.968    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##                                       exp(coef) exp(-coef) lower .95
## study2                                   1.0731     0.9318    0.8676
## study3                                   1.1073     0.9031    0.9101
## study4                                   0.8562     1.1679    0.6932
## ckd_studies_fewer$latentclass2           1.2329     0.8111    0.9342
## ckd_studies_fewer$latentclass3               NA         NA        NA
## ckd_studies_fewer$latentclass4           1.0196     0.9808    0.7595
## study2:ckd_studies_fewer$latentclass2    0.8765     1.1409    0.4580
## study3:ckd_studies_fewer$latentclass2    0.7580     1.3193    0.5437
## study4:ckd_studies_fewer$latentclass2    0.5893     1.6970    0.3723
## study2:ckd_studies_fewer$latentclass3        NA         NA        NA
## study3:ckd_studies_fewer$latentclass3        NA         NA        NA
## study4:ckd_studies_fewer$latentclass3        NA         NA        NA
## study2:ckd_studies_fewer$latentclass4    0.3693     2.7076    0.2381
## study3:ckd_studies_fewer$latentclass4    0.4243     2.3569    0.2914
## study4:ckd_studies_fewer$latentclass4    1.0077     0.9923    0.6924
##                                       upper .95
## study2                                   1.3273
## study3                                   1.3472
## study4                                   1.0576
## ckd_studies_fewer$latentclass2           1.6270
## ckd_studies_fewer$latentclass3               NA
## ckd_studies_fewer$latentclass4           1.3686
## study2:ckd_studies_fewer$latentclass2    1.6773
## study3:ckd_studies_fewer$latentclass2    1.0567
## study4:ckd_studies_fewer$latentclass2    0.9327
## study2:ckd_studies_fewer$latentclass3        NA
## study3:ckd_studies_fewer$latentclass3        NA
## study4:ckd_studies_fewer$latentclass3        NA
## study2:ckd_studies_fewer$latentclass4    0.5728
## study3:ckd_studies_fewer$latentclass4    0.6178
## study4:ckd_studies_fewer$latentclass4    1.4668
## 
## Concordance= 0.575  (se = 0.007 )
## Rsquare= 0.014   (max possible= 0.94 )
## Likelihood ratio test= 124.8  on 11 df,   p=<2e-16
## Wald test            = 105.9  on 11 df,   p=<2e-16
## Score (logrank) test = 111.5  on 11 df,   p=<2e-16
# test overall significance of the interaction term:
anova(mfit_esrd)
## Analysis of Deviance Table
##  Cox model: response is Surv(tesrd_, esrd_)
## Terms added sequentially (first to last)
## 
##                                     loglik  Chisq Df Pr(>|Chi|)    
## NULL                                -12495                         
## study                               -12489 12.486  3   0.005891 ** 
## ckd_studies_fewer$latentclass       -12461 54.540  2  1.435e-12 ***
## study:ckd_studies_fewer$latentclass -12432 57.829  6  1.241e-10 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Conclusions

We hypothesized that universally assessed baseline characteristics would substantially explain the differences in incidence rates of two clinical outcomes across international follow-up studies of CKD. However, we observed that incidence rates of ESRD and death across studies were not explained after adjustment, indicating that the selected predictors do not fully capture the characteristics that determine within these diverse study populations. Beyond that, we observed that core predictors relate with clinical outcomes in different intensities depending on the study setting.